libname nbbo '/wrds/nyse/sasdata/taqms/nbbo';
libname cq '/wrds/nyse/sasdata/taqms/cq';
libname ct '/wrds/nyse/sasdata/taqms/ct';
libname yashar '/scratch/insitution/2013';

options nomprint nosource nonotes;
ods listing close;

%macro do_merge();
%let z=1;

%let stock=
AAA
;

%do %while(%scan(&stock,&z) ne );
%let k=%scan(&stock,&z);


/* load daily NBBO data for stock k */

	%macro combine;
	
	data DailyNBBO_&k;
	set
		%do i=1 %to 12;
		%let prefix1=0;
		%if &&i>9 %then %let prefix1=;
			
			%do j=1 %to 31;
			%let prefix2=0;
			%if &&j>9 %then %let prefix2=;
			% let ex=%sysfunc(exist(nbbo.nbbom_2013&&prefix1.&&i&&prefix2.&&j,data));
			%if &&ex=1 %then %do;
			nbbo.nbbom_2013&&prefix1.&&i&&prefix2.&&j (where=(sym_root="&k" and time_m between "09:30:00.000000"t and "16:00:00.000000"t))
			%end;
			%end;
		%end;
		;
	format time_m part_time trf_time TIME20.6;
	run;
	%mend;
	%combine;

/* load daily quoate data for stock k */

	%macro combine;
	
	data DailyQuote_&k;
	set
		%do i=1 %to 12;
		%let prefix1=0;
		%if &&i>9 %then %let prefix1=;
			
			%do j=1 %to 31;
			%let prefix2=0;
			%if &&j>9 %then %let prefix2=;
			% let ex=%sysfunc(exist(cq.cqm_2013&&prefix1.&&i&&prefix2.&&j,data));
			%if &&ex=1 %then %do;
			cq.cqm_2013&&prefix1.&&i&&prefix2.&&j (where=(sym_root="&k" and time_m between "09:30:00.000000"t and "16:00:00.000000"t))
			%end;
			%end;
		%end;
		;
	format time_m part_time trf_time TIME20.6;
	run;
	%mend;
	%combine;


/* load daily trade data for stock k */

	%macro combine;
	
	data DailyTrade_&k;
	set
		%do i=1 %to 12;
		%let prefix1=0;
		%if &&i>9 %then %let prefix1=;
			
			%do j=1 %to 31;
			%let prefix2=0;
			%if &&j>9 %then %let prefix2=;
			% let ex=%sysfunc(exist(ct.ctm_2013&&prefix1.&&i&&prefix2.&&j,data));
			%if &&ex=1 %then %do;
			ct.ctm_2013&&prefix1.&&i&&prefix2.&&j (where=(sym_root="&k" and time_m between "09:30:00.000000"t and "16:00:00.000000"t and EX='D'))
			%end;
			%end;
		%end;
		;type='T';
	format time_m part_time trf_time TIME20.6;
	run;
	%mend;
	%combine;



/* clean the NBBO data */

	data DailyNBBO_&k;
		set DailyNBBO_&k;
		/* normal code condition imposed (A,B,H,O,R,W) */
		if Qu_Cond not in ('A','B','H','O','R','W') then delete;

		/* clncelel quotes deleted */
		if Qu_Cancel='B' then delete;

		/* drop if both ask and bid are 0 or mising */
		if Best_Ask le 0 and Best_Bid le 0 then delete;
		if Best_Asksiz le 0 and Best_Bidsiz le 0 then delete;
		if Best_Ask = . and Best_Bid = . then delete;
		if Best_Asksiz = . and Best_Bidsiz = . then delete;

		/* Create spread and midpoint */
		Spread=Best_Ask-Best_Bid;
		Midpoint=(Best_Ask+Best_Bid)/2;

		/* If size/price = 0 or . then price/size is set to . */
		    if Best_Ask le 0 then do;
		        Best_Ask=.;
		        Best_Asksiz=.;
		    end;
		    if Best_Ask=. then Best_Asksiz=.;
		    if Best_Asksiz le 0 then do;
		        Best_Ask=.;
		        Best_Asksiz=.;
		    end;
		    if Best_Asksiz=. then Best_Ask=.;
		    if Best_Bid le 0 then do;
		        Best_Bid=.;
		        Best_Bidsiz=.;
		    end;
		    if Best_Bid=. then Best_Bidsiz=.;
		    if Best_Bidsiz le 0 then do;
		        Best_Bid=.;
		        Best_Bidsiz=.;
		    end;
		    if Best_Bidsiz=. then Best_Bid=.;

		/*	Bid/Ask size are in round lots, replace with new shares variable*/
		Best_BidSizeShares = Best_BidSiz * 100;
		Best_AskSizeShares = Best_AskSiz * 100;
	run;

/* find previous midpoint */
	
	proc sort 
		data=DailyNBBO_&k (drop = Best_BidSiz Best_AskSiz);
		by sym_root date;
	run;

	data DailyNBBO_&k;
		set DailyNBBO_&k;
		by sym_root date;
		lmid=lag(Midpoint);
		if first.sym_root or first.date then lmid=.;
		lm25=lmid-2.5;
		lp25=lmid+2.5;
	run;

	/* If the quoted spread is greater than $5.00 and the bid (ask) price is less
	(greater) than the previous midpoint - $2.50 (previous midpoint + $2.50), 
	then the bid (ask) is not considered. */

	data DailyNBBO_&k;
		set DailyNBBO_&k;
		if Spread gt 5 and Best_Bid lt lm25 then do;
			Best_Bid=.;
			Best_BidSizeShares=.;
		end;
		if Spread gt 5 and Best_Ask gt lp25 then do;
		Best_Ask=.;
		Best_AskSizeShares=.;
		end;
		keep date time_m sym_root Best_Bidex Best_Bid Best_BidSizeShares Best_Askex 
		Best_Ask Best_AskSizeShares Qu_SeqNum;
	run;

	/* changes in NBBO identified in a new data set */

	data DailyNBBO2_&k;
		set DailyNBBO_&k;
		if sym_root ne lag(sym_root) 
		or date ne lag(date) 
		or Best_Ask ne lag(Best_Ask) 
		or Best_Bid ne lag(Best_Bid) 
		or Best_AskSizeShares ne lag(Best_AskSizeShares) 
		or Best_BidSizeShares ne lag(Best_BidSizeShares); 
	run;

/* clean the quote data */

	data DailyQuote_&k;
		set DailyQuote_&k;
		
		/* Create spread and midpoint*/;
		Spread=Ask-Bid;

		/* Delete if abnormal quote conditions */
		if Qu_Cond not in ('A','B','H','O','R','W')then delete; 

		/* Delete if abnormal crossed markets */
		if Bid>Ask then delete;

		/* Delete abnormal spreads*/
		if Spread>5 then delete;

		/* Delete withdrawn Quotes. This is 
	   		when an exchange temporarily has no quote, as indicated by quotes 
	   		with price or depth fields containing values less than or equal to 0 
	   		or equal to '.'. See discussion in Holden and Jacobsen (2014), 
	   		page 11. */
		if Ask le 0 or Ask =. then delete;
		if Asksiz le 0 or Asksiz =. then delete;
		if Bid le 0 or Bid =. then delete;
		if Bidsiz le 0 or Bidsiz =. then delete;
		drop Sym_Suffix Bidex Askex Qu_Cancel Qu_Source RPI SSR LULD_BBO_CQS LULD_BBO_UTP FINRA_ADF_MPID SIP_Message_ID Part_Time RRN TRF_Time Spread NATL_BBO_LULD;
		Q_id=_N_;	
	run;


/* clean the trade data */

	data DailyTrade_&k;
		set DailyTrade_&k;
		where Tr_Corr eq '00' and price gt 0;
		drop Tr_Corr Tr_Source TR_RF Part_Time RRN TRF_Time Sym_Suffix Tr_StopInd;
	run;


/* integrate NBBO and DailyQuote data to create THE complete NBBO (footnotes 6 and 24 of HJ(2014) at JF) */

	data DailyQuote_&k (rename=(Ask=Best_Ask Bid=Best_Bid));
		set DailyQuote_&k;
		where NatBBO_Ind='1';
		keep Q_id date time_m sym_root Qu_SeqNum Bid Best_BidSizeShares Ask Best_AskSizeShares;

		/*	Bid/Ask size are in round lots, replace with new shares variable
		and rename Best_BidSizeShares and Best_AskSizeShares*/
		
		Best_BidSizeShares = Bidsiz * 100;
		Best_AskSizeShares = Asksiz * 100;

	run;

	proc sort data=DailyNBBO2_&k;
		by sym_root date Qu_SeqNum;
	run;

	proc sort data=DailyQuote_&k;
		by sym_root date Qu_SeqNum;
	run;

	data CompleteDailyNBBO_&k (drop=Best_Askex Best_Bidex);
		set DailyNBBO2_&k DailyQuote_&k;
		by sym_root date Qu_SeqNum;
		retain Q_id2;
		if Q_id>. then Q_id2=Q_id;
		drop Q_id;
		rename Q_id2=Q_id;
	run;


	/* If the NBBO Contains two quotes in the exact same microseond, assume 
	last quotes (based on sequence number) is active one */

	proc sort data=CompleteDailyNBBO_&k;
		by sym_root date time_m descending Qu_SeqNum;
	run;

	proc sort data=CompleteDailyNBBO_&k nodupkey;
		by sym_root date time_m;
	run;



/* Match trades with the quotes that are in effect at the previous milli-second. */

	data CompleteDailyNBBO_&k;
		set CompleteDailyNBBO_&k;type='Q';
		time_m=time_m;
		drop Qu_SeqNum EX;
	run;

	proc sort data=CompleteDailyNBBO_&k;
		by sym_root date time_m;
	run;

	proc sort data=DailyTrade_&k;
		by sym_root date time_m Tr_SeqNum;
	run;

	data TradeNBBOQuotes_&k;
		set CompleteDailyNBBO_&k DailyTrade_&k;
		by sym_root date time_m type;
	run;

	data TradeNBBOQuotes_&k (drop=Best_Ask Best_Bid Best_AskSizeShares Best_BidSizeShares);
		set TradeNBBOQuotes_&k;
		by sym_root date;
		retain QTime NBO NBB NBOqty NBBqty;
		if first.sym_root or first.date and type='T' then do;
			QTime=.;
			NBO=.;
			NBB=.;
			NBOqty=.;
			NBBqty=.;
		end;

		if type='Q' then Qtime=time_m;
		else Qtime=Qtime;

		if type='Q' then NBO=Best_Ask;
		else NBO=NBO;

		if type='Q' then NBB=Best_Bid;
		else NBB=NBB;

		if type='Q' then NBOqty=Best_AskSizeShares;
		else NBOqty=NBOqty;
		
		if type='Q' then NBBqty=Best_BidSizeShares;
		else NBBqty=NBBqty;

		format Qtime TIME20.6;

		retain Q_id2;
		if Q_id>. then Q_id2=Q_id;
		drop Q_id;
		rename Q_id2=Q_id;

		if size=. then delete;

		dvol=PRICE*SIZE;
		resid=PRICE-(NBB+NBO)/2;
		if resid<0 then resid=-resid;
		block=0;
		if SIZE>999 then block=1;
		if dvol<50000 and block=1 then block=0;
		if resid>0.001 then block=0;
		blocknbbo=0;
		if SIZE>999 then blocknbbo=1;
		if dvol<50000 and blocknbbo=1 then blocknbbo=0;
		if abs(price-NBB)<0.001 then blocknbbo=0;
		if abs(price-NBO)<0.001 then blocknbbo=0;

	run;



	data dates_&k;
		set TradeNBBOQuotes_&k;
		by date;
		if first.date then d=1;
		if d ne 1 then delete;
		keep date sym_root;
	run;


	proc means data=TradeNBBOQuotes_&k (where=(block=1));
		class date;
		var size;
		output OUT=temp SUM=inst_mp_vol;
	run;

	proc sort data=dates_&k;
		by date;
	run;

	proc sort data=temp;
		by date;
	run;

	data comb_&k;
		merge dates_&k temp;
		by date;
		if _type_=0 then delete;
		drop _type_;
		rename _freq_=inst_mp_trd;
	run;

	proc means data=TradeNBBOQuotes_&k;
		class date;
		var size;
		output OUT=temp SUM=offex_vol;
	run;

	proc sort data=comb_&k;
		by date;
	run;

	proc sort data=temp;
		by date;
	run;

	data comb_&k;
		merge comb_&k temp;
		by date;
		if _type_=0 then delete;
		drop _type_;
		rename _freq_=offex_trd;
	run;


	proc means data=TradeNBBOQuotes_&k (where=(blocknbbo=1));
		class date;
		var size;
		output OUT=temp SUM=ins_nbbo_vol;
	run;

	proc sort data=comb_&k;
		by date;
	run;

	proc sort data=temp;
		by date;
	run;

	data comb_&k;
		merge comb_&k temp;
		by date;
		if _type_=0 then delete;
		drop _type_;
		rename _freq_=ins_nbbo_trd;
	run;


 	proc export data=comb_&k outfile="/scratch/institution/2013/DUR_&k..txt" dbms=TAB replace;
	run;

%let z=%eval(&z+1);
%end;
%mend;
%do_merge();
